## borramos el espacion de trabajo
rm(list =ls() )
##importamos la librerias
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.5.0
## ✔ readr 2.1.4 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(DT)
##traemos el CSV de fumadores desde git
path<- 'https://raw.githubusercontent.com/lacamposm/Fundamentos_Analitica/main/data/titanic3.csv'
df <- read.csv2(path, sep = ",", stringsAsFactors = T, na.strings="", dec = ".")
str(head(df,10))
## 'data.frame': 10 obs. of 14 variables:
## $ pclass : int 1 1 1 1 1 1 1 1 1 1
## $ survived : int 1 1 0 0 0 1 1 0 1 0
## $ name : Factor w/ 1307 levels "Abbing, Mr. Anthony",..: 22 24 25 26 27 31 46 47 51 55
## $ sex : Factor w/ 2 levels "female","male": 1 2 1 2 1 2 1 2 1 2
## $ age : num 29 0.917 2 30 25 ...
## $ sibsp : int 0 1 1 1 1 0 1 0 2 0
## $ parch : int 0 2 2 2 2 0 0 0 0 0
## $ ticket : Factor w/ 929 levels "110152","110413",..: 188 50 50 50 50 125 93 16 77 826
## $ fare : num 211 152 152 152 152 ...
## $ cabin : Factor w/ 186 levels "A10","A11","A14",..: 44 80 80 80 80 150 146 16 62 NA
## $ embarked : Factor w/ 3 levels "C","Q","S": 3 3 3 3 3 3 3 3 3 1
## $ boat : Factor w/ 27 levels "1","10","11",..: 12 3 NA NA NA 13 2 NA 27 NA
## $ body : int NA NA NA 135 NA NA NA NA NA 22
## $ home.dest: Factor w/ 369 levels "?Havana, Cuba",..: 309 231 231 231 231 237 162 24 22 229
class(df)
## [1] "data.frame"
##pclass Clase del pasajero.(1 = 1°; 2 = 2°; 3 = 3°). ##survived Supervivencia (0 = No; 1 = Sí). ##name Nombre. ##sex Sexo. ##age Edad. ##sibsp Numero de hermanos/cónyuges a bordo (esposa/prome). ##parch Número de padres/hijos a bordo (par / hijo). ##ticket Número del billete. ##fare Tarifa pagada. ##cabin Cabina. ##embarked Puerto de embarque. (C = Cherburgo; Q = Queenstown; S = Southampton). ##boat Bote salva vidas. ##body Número de identificación del cuerpo. ##home.dest Destino.
##Modificamos los valores a logicos
df$survived = as.logical(df$survived)
new_df <- df %>% filter(survived=='TRUE') %>% filter(embarked !='NA') %>% group_by(survived , pclass )
str(new_df)
## gropd_df [498 × 14] (S3: grouped_df/tbl_df/tbl/data.frame)
## $ pclass : int [1:498] 1 1 1 1 1 1 1 1 1 1 ...
## $ survived : logi [1:498] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ name : Factor w/ 1307 levels "Abbing, Mr. Anthony",..: 22 24 31 46 51 70 73 93 94 100 ...
## $ sex : Factor w/ 2 levels "female","male": 1 2 2 1 1 1 1 1 2 1 ...
## $ age : num [1:498] 29 0.917 48 63 53 ...
## $ sibsp : int [1:498] 0 1 0 1 2 1 0 0 0 0 ...
## $ parch : int [1:498] 0 2 0 0 0 0 0 0 0 1 ...
## $ ticket : Factor w/ 929 levels "110152","110413",..: 188 50 125 93 77 834 796 119 297 801 ...
## $ fare : num [1:498] 211.3 151.6 26.6 78 51.5 ...
## $ cabin : Factor w/ 186 levels "A10","A11","A14",..: 44 80 150 146 62 98 34 NA 9 49 ...
## $ embarked : Factor w/ 3 levels "C","Q","S": 3 3 3 3 3 1 1 3 3 1 ...
## $ boat : Factor w/ 27 levels "1","10","11",..: 12 3 13 2 27 14 22 18 24 18 ...
## $ body : int [1:498] NA NA NA NA NA NA NA NA NA NA ...
## $ home.dest: Factor w/ 369 levels "?Havana, Cuba",..: 309 231 237 162 22 237 258 NA 158 230 ...
## - attr(*, "groups")= tibble [3 × 3] (S3: tbl_df/tbl/data.frame)
## ..$ survived: logi [1:3] TRUE TRUE TRUE
## ..$ pclass : int [1:3] 1 2 3
## ..$ .rows : list<int> [1:3]
## .. ..$ : int [1:198] 1 2 3 4 5 6 7 8 9 10 ...
## .. ..$ : int [1:119] 199 200 201 202 203 204 205 206 207 208 ...
## .. ..$ : int [1:181] 318 319 320 321 322 323 324 325 326 327 ...
## .. ..@ ptype: int(0)
## ..- attr(*, ".drop")= logi TRUE
titanic <- ggplot(new_df) + aes(x=pclass, color = embarked) + geom_density() +
labs(y="Puerto de embarque",x="Clase del pasajero",title="Supervivencia") +
theme(axis.text.y = element_blank(), axis.ticks = element_blank() )
ggplotly(titanic)
new_df2 <- new_df %>% summarize(embarked = n())
## `summarise()` has grouped output by 'survived'. You can override using the
## `.groups` argument.
head(new_df2,10)
## # A tibble: 3 × 3
## # Groups: survived [1]
## survived pclass embarked
## <lgl> <int> <int>
## 1 TRUE 1 198
## 2 TRUE 2 119
## 3 TRUE 3 181
titanic2 <- ggplot(new_df2) +
aes(x=pclass , y=embarked) +
geom_bar(stat = "identity", fill = c("white", "green", "blue") ) +
labs(y="Puerto de embarque",x="Clase del pasajero",title="Supervivencia") +
theme(axis.text.y = element_blank(), axis.ticks = element_blank() )
ggplotly(titanic2)
new_df3 <- df %>% filter(survived=='TRUE') %>% filter(embarked !='NA') %>% group_by(embarked,pclass)
titanic3 <- ggplot(new_df3) +
aes(x=pclass, fill=embarked) +
geom_bar( color = 'black') +
labs(y="embarque",x="clase",title="Supervivencia / clase") +
theme(axis.text.y = element_blank(), axis.ticks = element_blank() )
ggplotly(titanic3)
new_df3 <- df %>% filter(survived=='TRUE') %>% filter(embarked !='NA') %>% group_by(embarked , pclass , sex)
titanic4 <- ggplot(new_df3) +
aes(x=embarked,Y=pclass,fill=sex) +
geom_bar( color = 'black') +
labs(y="Cantidad",x="Embarque",title="Supervivencia / Genero") +
theme(axis.text.y = element_blank(), axis.ticks = element_blank() )
ggplotly(titanic4)
## Warning: The following aesthetics were dropped during statistical transformation: Y
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
Puerto de embarque. (C = Cherburgo; Q = Queenstown; S = Southampton).